# Load necessary librarieslibrary(raster)library(here)library(sp)library(dplyr)library(tidyr)# Load the data ----load(here::here("Data", "GAP analyses", "puvsp_marine.Rdata"))# Load the raster layersmpa_ALL <- raster::raster(here::here("Data", "GAP analyses", "mpa_ALL_binary.tif"))mpa_NT <- raster::raster(here::here("Data", "GAP analyses", "mpa_NT_binary.tif"))
GAP analyses
GAP analyses of shark and ray range overlaps with Marine Protected Areas
Code
# Convert species dataframe to spatial pointsspecies_points <-SpatialPointsDataFrame(coords = puvsp_marine[, c("lon", "lat")], data = puvsp_marine, proj4string =CRS(projection(mpa_ALL)))# Extract MPA values for each species pointmpa_ALL_values <- raster::extract(mpa_ALL, species_points)mpa_NT_values <- raster::extract(mpa_NT, species_points)# Add MPA values to the species dataframepuvsp_marine$mpa_ALL_present <- mpa_ALL_valuespuvsp_marine$mpa_NT_present <- mpa_NT_values# Function to calculate percentage of range in MPAcalculate_mpa_percentage <-function(species_column, mpa_column) { species_presence <- species_column ==1# Assuming 1 indicates species presence total_range <-sum(species_presence, na.rm =TRUE) range_in_mpa <-sum(species_presence & mpa_column ==1, na.rm =TRUE)if (total_range ==0) {return(NA) # Return NA if the species is not present anywhere } percentage <- (range_in_mpa / total_range) *100return(percentage)}# Apply function to all species columns for both MPA typesspecies_columns <-4:(ncol(puvsp_marine) -2) # Assuming species columns start at 4mpa_ALL_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_ALL_present))mpa_NT_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_NT_present))# Create a dataframe with resultsresults <-data.frame(species =names(mpa_ALL_percentages),percentage_in_ALL_MPA = mpa_ALL_percentages,percentage_in_NT_MPA = mpa_NT_percentages)# Remove any NA resultsresults <- results[!is.na(results$percentage_in_ALL_MPA) &!is.na(results$percentage_in_NT_MPA), ]# Sort results by percentage in ALL MPAs (descending)results <- results[order(-results$percentage_in_ALL_MPA), ]# Display top 10 species#print(head(results, 10))# Summary statisticscat("\nSummary Statistics for ALL MPAs:\n")
Summary Statistics for ALL MPAs:
Code
print(summary(results$percentage_in_ALL_MPA))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 3.906 9.524 15.324 17.663 100.000
Code
cat("\nSummary Statistics for No-Take MPAs:\n")
Summary Statistics for No-Take MPAs:
Code
print(summary(results$percentage_in_NT_MPA))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.0000 0.7836 2.5746 2.8294 50.0000
Code
# Save results to CSV#write.csv(results, "species_mpa_coverage_ALL_and_NT.csv", row.names = FALSE)
Null model of MPA placement within EEZs
Null model of MPA placement and overlaps with the range of sharks and rays
Code
library(raster)library(here)library(sp)library(sf)library(parallel)library(pbapply)# Load the bathymetry rasterBathy <-raster(here::here("Data", "bathymetry-0.1deg-adjusted.tif"))# Create marine mask from bathymetry (areas < 0)marine_mask <- Bathy <0# Resample marine mask to match MPA raster resolutionmarine_mask_resampled <- raster::resample(marine_mask, mpa_ALL, method ="ngb")# Ensure marine_mask_resampled is binary (0 or 1)marine_mask_resampled <- raster::reclassify(marine_mask_resampled, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Load the EEZ shapefileeez <-st_read(here("Data", "World_EEZ_v12_20231025", "eez_v12.shp"), quiet =TRUE)# Function to create a raster mask of EEZs with unique values for each countrycreate_eez_mask <-function(template_raster) {# Ensure EEZ has a unique identifier for each country eez$country_id <-as.numeric(as.factor(eez$SOVEREIGN1))# Rasterize the EEZ shapefile using the country_id eez_raster <-rasterize(eez, template_raster, field ="country_id")return(eez_raster)}# Updated create_random_mpa function to randomize within each country's EEZcreate_random_mpa <-function(template_raster, marine_mask, eez_mask) {# Ensure template_raster is binary template_raster <- raster::reclassify(template_raster, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Combine marine_mask and eez_mask combined_mask <- marine_mask * (eez_mask >0)# Get unique country IDs country_ids <-unique(eez_mask[!is.na(eez_mask)]) country_ids <- country_ids[country_ids >0]# Initialize random raster random_raster <- template_raster random_raster[] <-NAfor (country_id in country_ids) {# Create mask for current country country_mask <- eez_mask == country_id# Count original marine MPA cells within current country's EEZ original_mpa_cells <-sum(template_raster[] ==1& country_mask[] &!is.na(combined_mask[]), na.rm =TRUE)# Get all valid cells for current country (marine areas within EEZ) valid_cells <-which(country_mask[] &!is.na(combined_mask[])) total_valid_cells <-length(valid_cells)if (original_mpa_cells >0&& total_valid_cells >0) {if (original_mpa_cells > total_valid_cells) {warning(paste("More MPA cells than valid marine cells for country ID", country_id, ". Adjusting MPA cell count.")) original_mpa_cells <- total_valid_cells }# Randomly select valid cells to be MPAs for current country mpa_cells <-sample(valid_cells, size =min(original_mpa_cells, total_valid_cells), replace =FALSE)# Update random raster random_raster[valid_cells] <-0 random_raster[mpa_cells] <-1 } }return(random_raster)}# Calculate MPA percentage functioncalculate_mpa_percentage <-function(species_values, mpa_values) { total_cells <-sum(!is.na(species_values)) mpa_cells <-sum(mpa_values ==1&!is.na(species_values), na.rm =TRUE) percentage <- (mpa_cells / total_cells) *100return(percentage)}# Main analysis functionrun_random_mpa_analysis <-function(species_data, mpa_raster, marine_mask, eez_mask, n_iterations =100) {# Convert species dataframe to spatial points species_points <- sp::SpatialPointsDataFrame(coords = species_data[, c("lon", "lat")], data = species_data, proj4string = sp::CRS(raster::projection(mpa_raster)))# Prepare results storage species_columns <-4:(ncol(species_data) -1) # Adjust if needed all_results <-matrix(nrow =length(species_columns), ncol = n_iterations)rownames(all_results) <-names(species_data)[species_columns]# Use pblapply for parallel processing with a progress bar all_results <-pblapply(1:n_iterations, function(i) {set.seed(i) # Set seed for reproducibility random_mpa <-create_random_mpa(mpa_raster, marine_mask, eez_mask) random_mpa_values <- raster::extract(random_mpa, species_points)sapply(species_data[, species_columns], function(x) calculate_mpa_percentage(x, random_mpa_values)) }, cl =detectCores() -1) # Use one less than available cores all_results <-do.call(cbind, all_results)# Calculate mean and standard deviation for each species mean_percentages <-rowMeans(all_results, na.rm =TRUE) sd_percentages <-apply(all_results, 1, sd, na.rm =TRUE) results <-data.frame(species =rownames(all_results),mean_percentage = mean_percentages,sd_percentage = sd_percentages )return(results)}# Function to process resultsprocess_results <-function(results, mpa_type) {# Sort results by mean_percentage in descending order results <- results[order(-results$mean_percentage), ]# Calculate summary statistics summary_stats <-summary(results$mean_percentage)# Write results to CSVwrite.csv(results, here::here("Outputs",paste0("species_random_", mpa_type, "_coverage.csv")), row.names =FALSE)# Return a list containing the processed datareturn(list(top_10 =head(results, 10),summary_stats = summary_stats,all_results = results ))}# Create the EEZ mask with unique country IDseez_mask <-create_eez_mask(mpa_ALL)# Run the analysis with the country-specific EEZ constrainttryCatch({ random_results_ALL <-run_random_mpa_analysis(puvsp_marine, mpa_ALL, marine_mask_resampled, eez_mask, n_iterations =100)print("Analysis for ALL MPAs completed successfully.")}, error =function(e) {print(paste("Error in ALL MPAs analysis:", e$message))})
[1] "Analysis for ALL MPAs completed successfully."
[1] "Analysis for No-Take MPAs completed successfully."
Code
# Process results for both MPA typesprocessed_results <-list()if (exists("random_results_ALL")) { processed_results[["ALL MPAs"]] <-process_results(random_results_ALL, "ALL MPAs")}if (exists("random_results_NT")) { processed_results[["No-take MPAs"]] <-process_results(random_results_NT, "No-take MPAs")}
Important note: This NULL model randomly distributes protected grid cells at a 0.5-degree resolution within country’s EEZs. However, it does not preserve MPA shape.
Compare results
Compare results between the MPA network and the Null model of MPA placement
Code
# Compare results ---- library(dplyr)library(tidyr)library(ggplot2)# Load actual MPA coverage dataactual_coverage <-read.csv(here::here("Outputs", "species_mpa_coverage_ALL_and_NT.csv"))colnames(actual_coverage)[2:3]=c("ALL","NT")# Load random MPA coverage resultsrandom_ALL <-read.csv(here::here("Outputs", "species_random_ALL MPAs_coverage.csv"))random_NT <-read.csv(here::here("Outputs", "species_random_No-take MPAs_coverage.csv"))# Reshape actual coverage data to long formatactual_long <- actual_coverage %>%pivot_longer(cols =c(ALL, NT), names_to ="mpa_type", values_to ="actual_percentage")# Function to merge and compare actual vs random datacompare_coverage <-function(actual_data, random_data, mpa_type) { comparison <- actual_data %>%filter(mpa_type ==!!mpa_type) %>%left_join(random_data, by ="species") %>%mutate(difference = actual_percentage - mean_percentage,z_score = (actual_percentage - mean_percentage) / sd_percentage)return(comparison)}# Create comparison dataframescomparison_ALL <-compare_coverage(actual_long, random_ALL, "ALL")comparison_NT <-compare_coverage(actual_long, random_NT, "NT")# Function to summarize and print resultslibrary(knitr)library(kableExtra)library(knitr)library(kableExtra)summarize_results <-function(comparison_data, mpa_type) { over_represented <-mean(comparison_data$difference >0, na.rm=TRUE) *100 under_represented <-mean(comparison_data$difference <0, na.rm=TRUE) *100 equally_represented <-100- over_represented - under_represented summary_df <-data.frame(Representation =c("Over-represented", "Under-represented", "Equally represented"),Percentage =round(c(over_represented, under_represented, equally_represented), 2) )kable(summary_df, format ="html", col.names =c("Representation", "Percentage (%)"),caption =paste("Summary for", mpa_type, "MPAs")) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(2, width ="100px") %>%row_spec(0, bold =TRUE) %>%add_header_above(c(" "=1, "Species"=1)) %>%footnote(general ="Percentages may not sum to 100% due to rounding.")}# Summarize resultssummarize_results(comparison_ALL, "ALL")
Top 5 Significantly Over/Under-represented Species in ALL MPAs
Species
Actual %
Random %
Difference
Z-score
Representation
Over-represented
Pseudocarcharias kamoharai
8.33
6.80
1.53
24.75
Over-represented
Megachasma pelagios
9.27
8.00
1.27
24.20
Over-represented
Mobula thurstoni
8.58
7.55
1.03
22.21
Over-represented
Alopias pelagicus
9.60
8.18
1.42
21.68
Over-represented
Mobula tarapacana
8.11
7.05
1.05
21.65
Over-represented
Under-represented
Amblyraja hyperborea
8.63
16.13
-7.49
-33.90
Under-represented
Mobula hypostoma
4.35
10.87
-6.52
-25.16
Under-represented
Sphyrna mokarran
9.45
14.43
-4.98
-24.65
Under-represented
Apristurus exsanguis
9.81
42.51
-32.70
-24.35
Under-represented
Etmopterus molleri
8.24
27.75
-19.51
-24.02
Under-represented
Note:
Total significantly over-represented species: 280
Total significantly under-represented species: 449
Code
significant_species(comparison_NT, "No-Take")
Top 5 Significantly Over/Under-represented Species in No-Take MPAs
Species
Actual %
Random %
Difference
Z-score
Representation
Over-represented
Narke capensis
0.78
0.00
0.78
Inf
Over-represented
Pseudoginglymostoma brevicaudatum
0.98
0.00
0.98
Inf
Over-represented
Trigonognathus kabeyai
44.27
2.60
41.67
32.80
Over-represented
Apristurus spongiceps
28.74
2.44
26.30
26.41
Over-represented
Parmaturus albimarginatus
5.00
0.03
4.97
19.90
Over-represented
Under-represented
Carcharhinus obscurus
3.13
6.05
-2.92
-13.82
Under-represented
Hypogaleus hyugaensis
5.69
16.07
-10.38
-13.69
Under-represented
Sphyrna lewini
1.58
2.99
-1.41
-13.66
Under-represented
Carcharias taurus
2.04
6.09
-4.05
-13.49
Under-represented
Sphyrna mokarran
1.41
2.58
-1.17
-12.15
Under-represented
Note:
Total significantly over-represented species: 133
Total significantly under-represented species: 365
Key message: >60% of species are under-represented by the global MPA network (i.e. less protected by the current MPA network than by a random placement of MPAs).
Map results of the GAP analyses
Map Standardised Effect Sizes of MPA coverage
Code
#For ALL MPAs difference_sp=comparison_ALL[,c(1,3,6)]#Plot the difference # Load necessary librarieslibrary(dplyr)library(tidyr)library(ggplot2)#SES:# Step 1: Calculate SES for each species in comparison_ALLcomparison_ALL <- comparison_ALL %>%mutate(SES = (actual_percentage - mean_percentage) / sd_percentage)# Assuming you have a similar species-to-grid-cell mapping like puvsp_marine# Reshape puvsp_marine from wide to long format (if needed)puvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data (comparison_ALL) to puvsp_longcombined_data <- puvsp_long %>%left_join(comparison_ALL, by =c("species"="species"))# Step 3: Group by grid cell (id, lon, lat) and calculate mean SESmean_ses_per_cell <- combined_data %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES, na.rm =TRUE)) %>%ungroup()# Load necessary librarieslibrary(ggplot2)library(rnaturalearth)library(sf)# Step 1: Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Step 2: Plot the mean SES with a diverging color scaleg_ses =ggplot() +geom_tile(data = mean_ses_per_cell, aes(x = lon, y = lat, fill = mean_SES)) +geom_sf(data = land, fill ="darkgrey", color =NA) +# Add land in dark greyscale_fill_gradient2(low ="#2166ac", mid ="#f0f0f0", high ="#b2182b", midpoint =0# Blue-White-Red gradient, with midpoint at 0 ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +# Set global coordinatestheme_minimal() +theme(legend.position ="bottom", # Position the legend at the bottomlegend.title =element_text(hjust =0.5), # Center the legend titlelegend.key.width =unit(3, "cm"), # Adjust width of the legend barlegend.key.height =unit(0.5, "cm") # Adjust height of the legend bar ) +guides(fill =guide_colorbar(direction ="horizontal", # Make the legend horizontaltitle.position ="top", # Move the title to the top of the legendtitle.hjust =0.5# Center the title horizontally ) ) +labs(title ="Standardized Effect Size (SES) of Observed vs. Null MPA Coverage",x ="Longitude",y ="Latitude",fill ="Mean SES")print(g_ses)
Code
#For No-take MPAs difference_sp=comparison_NT[,c(1,3,6)]#SES# Step 1: Calculate SES for each species in comparison_ALLcomparison_NT <- comparison_NT %>%mutate(SES = (actual_percentage - mean_percentage) / sd_percentage)# Assuming you have a similar species-to-grid-cell mapping like puvsp_marine# Reshape puvsp_marine from wide to long format (if needed)puvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data (comparison_ALL) to puvsp_longcombined_data <- puvsp_long %>%left_join(comparison_NT, by =c("species"="species"))# Step 3: Group by grid cell (id, lon, lat) and calculate mean SESmean_ses_per_cell <- combined_data %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES, na.rm =TRUE)) %>%ungroup()# Load necessary librarieslibrary(ggplot2)library(rnaturalearth)library(sf)# Step 1: Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Step 2: Plot the mean SES with a diverging color scaleg_ses =ggplot() +geom_tile(data = mean_ses_per_cell, aes(x = lon, y = lat, fill = mean_SES)) +geom_sf(data = land, fill ="darkgrey", color =NA) +# Add land in dark greyscale_fill_gradient2(low ="#2166ac", mid ="#f0f0f0", high ="#b2182b", midpoint =0# Blue-White-Red gradient, with midpoint at 0 ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +# Set global coordinatestheme_minimal() +theme(legend.position ="bottom", # Position the legend at the bottomlegend.title =element_text(hjust =0.5), # Center the legend titlelegend.key.width =unit(3, "cm"), # Adjust width of the legend barlegend.key.height =unit(0.5, "cm") # Adjust height of the legend bar ) +guides(fill =guide_colorbar(direction ="horizontal", # Make the legend horizontaltitle.position ="top", # Move the title to the top of the legendtitle.hjust =0.5# Center the title horizontally ) ) +labs(title ="Standardized Effect Size (SES) of Observed vs. Null no-take MPA Coverage",x ="Longitude",y ="Latitude",fill ="Mean SES")print(g_ses)
Key message: Northern Pacific, Northern Atlantic, Southern Pacific, Southern Atlantic and global coastal/continental sharks and rays are less protected by MPAs than expected under a random placement of MPAs. This is even more the case for no-take MPAs.
Relate to IUCN status
Relate the percentage of species range overlapped by MPAs to the IUCN Red List status with difference tests between categories
Code
IUCN <-readRDS(here::here("Data","GAP analyses","sharks_iucn_final.RDS"))# Remove underscores from species namesIUCN$species <-gsub("_", " ", IUCN$Species)results_IUCN=left_join(IUCN, results, by=c("species"))library(ggplot2)library(dplyr)# Convert iucn_GE to a factor for better plottingresults_IUCN$iucn_GE <-as.factor(results_IUCN$iucn_GE)results_IUCN <- results_IUCN %>%mutate(IUCN_status =case_when( iucn_GE ==0~"LC", iucn_GE ==1~"NT", iucn_GE ==2~"VU", iucn_GE ==3~"EN", iucn_GE ==4~"CR",TRUE~"Unknown" ))# Filter out the "Unknown" categoryresults_IUCN_filtered <- results_IUCN %>%filter(IUCN_status !="Unknown")# Define IUCN colors and orderiucn_colors <-c("CR"="#D81E05", # Red"EN"="#FC7F3F", # Orange"VU"="#FEC748", # Yellow"NT"="#58AFFF", # Light Blue"LC"="#38AB38"# Green)iucn_order <-c("LC", "NT", "VU", "EN", "CR")library(ggplot2)library(dplyr)library(dunn.test)# Perform Kruskal-Wallis testkruskal_result <-kruskal.test(percentage_in_ALL_MPA ~ IUCN_status, data = results_IUCN_filtered)# Perform Dunn's test for pairwise comparisons and capture the outputinvisible(capture.output( dunn_result <-dunn.test(results_IUCN_filtered$percentage_in_ALL_MPA, results_IUCN_filtered$IUCN_status, method ="bonferroni")))# Create a function to add significance levels to the plotadd_significance <-function(p.value) {if(p.value <=0.001) return("***")elseif(p.value <=0.01) return("**")elseif(p.value <=0.05) return("*")elsereturn("ns")}# Create a data frame with pairwise comparisons and their significancesignificant_comparisons <-data.frame(group1 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 1),group2 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 2),p.value = dunn_result$P.adjusted,stringsAsFactors =FALSE)# Add significance levels to the data framesignificant_comparisons$significance <-sapply(significant_comparisons$p.value, add_significance)# Filter out non-significant comparisonssignificant_comparisons <- significant_comparisons[significant_comparisons$significance !="ns", ]# Calculate x positions for the significance barsiucn_levels <-c("LC", "NT", "VU", "EN", "CR")x_positions <-seq_along(iucn_levels)names(x_positions) <- iucn_levelssignificant_comparisons$x1 <- x_positions[significant_comparisons$group1]significant_comparisons$x2 <- x_positions[significant_comparisons$group2]# Sort comparisons based on the difference between group indicessignificant_comparisons$group_diff <-abs(significant_comparisons$x2 - significant_comparisons$x1)significant_comparisons <- significant_comparisons[order(significant_comparisons$group_diff, decreasing =TRUE), ]# Calculate y positions for the sorted comparisonssignificant_comparisons$y.position <-seq(max(results_IUCN_filtered$percentage_in_ALL_MPA, na.rm =TRUE) +5, by =5, length.out =nrow(significant_comparisons))# Recreate the base violin plotviolin_plot <-ggplot(results_IUCN_filtered, aes(x =factor(IUCN_status, levels = iucn_order), y = percentage_in_ALL_MPA)) +geom_violin(aes(fill = IUCN_status, color = IUCN_status), trim =FALSE, alpha =0.5) +geom_jitter(width =0.1, size =0.5, alpha =0.5, color ="darkgray") +geom_boxplot(width =0.1, fill ="white", color ="black", outlier.shape =NA, alpha =0.8) +labs(x ="IUCN Status", y ="(%) Range within MPAs") +theme_minimal() +theme(plot.title =element_text(hjust =0.5), legend.position ="none") +scale_x_discrete(limits = iucn_order) +scale_fill_manual(values = iucn_colors) +scale_color_manual(values = iucn_colors)# Add significance bars and labelsviolin_plot_with_significance <- violin_plot +geom_segment(data = significant_comparisons,aes(x = x1, xend = x2, y = y.position, yend = y.position),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x1, xend = x1, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x2, xend = x2, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_text(data = significant_comparisons,aes(x = (x1 + x2) /2, y = y.position, label = significance),inherit.aes =FALSE,vjust =-0.5, size =3)# Display the updated plotprint(violin_plot_with_significance)
# A tibble: 5 × 5
IUCN_status count mean median sd
<chr> <int> <dbl> <dbl> <dbl>
1 LC 514 3.63 0.961 7.07
2 NT 118 2.03 0.878 3.21
3 VU 179 2.07 0.932 4.04
4 EN 121 1.58 0.834 2.59
5 CR 83 1.41 0.826 1.88
Key message: Least concerned species are significantly more protected by all MPAs than any other IUCN Red List threat status category. We also note a similar relationship (LC vs VU, EN & CR) for no-take MPAs, however, after p-value adjustment for multiple comparison, differences were not significant for no-take MPAs.
Relate to species traits
Relate the percentage of species range overlapped by MPAs to the trait space of species. 1) Use Pearson correlation tests between the axes of the trait space and MPA coverage. 2) Predict relationships from a GAM into the kernel density gridded trait space.
Code
# Relate to species traits ----load(here::here("Data", "GAP analyses", "coords_1005.Rdata"))load(here::here("Data", "GAP analyses", "grids_commonspecies_corrected_021024.Rdata"))# Load the necessary librarylibrary(tibble)# Convert to tibble and add row names as the first columncoords <- tibble::rownames_to_column(coords, var ="Species")colnames(results)[1]="Species"results_coords=left_join(coords, results)library(Hmisc)library(dplyr)library(tidyr)# Calculate correlation matrix with p-valuescor_matrix_with_p <-rcorr(as.matrix(results_coords[, c("A1", "A2", "A3", "percentage_in_ALL_MPA", "percentage_in_NT_MPA")]))# Extract correlation coefficients and p-valuescor_coefficients <- cor_matrix_with_p$rp_values <- cor_matrix_with_p$P# Create a function to format the resultsformat_cor_p <-function(cor, p) {sprintf("%.3f (p = %.3f)", cor, p)}# Create a data frame with formatted resultsresult_df <-data.frame(Predictor =c("A1", "A2", "A3"),`percentage_in_ALL_MPA`=format_cor_p(cor_coefficients["percentage_in_ALL_MPA", c("A1", "A2", "A3")], p_values["percentage_in_ALL_MPA", c("A1", "A2", "A3")]),`percentage_in_NT_MPA`=format_cor_p(cor_coefficients["percentage_in_NT_MPA", c("A1", "A2", "A3")], p_values["percentage_in_NT_MPA", c("A1", "A2", "A3")]))library(knitr)library(kableExtra)format_correlation_table <-function(result_df) {kable(result_df, format ="html", escape =FALSE,col.names =c("Predictor", "ALL MPA", "NT MPA"),caption ="Pearson Correlation Results") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(1, bold =TRUE) %>%add_header_above(c(" "=1, "Percentage in MPA"=2)) %>%footnote(general ="Values shown as: correlation coefficient (p-value)",general_title ="Note:")}# Usage:formatted_table <-format_correlation_table(result_df)formatted_table
library(gridExtra)# Scatter plot for A1 vs percentage in ALL MPAplot_A1 <-ggplot(results_coords, aes(x = A1, y = percentage_in_ALL_MPA)) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="red", se =TRUE) +theme_minimal() +labs(title ="A1 vs Percentage in ALL MPA",x ="A1",y ="Percentage in ALL MPA")# Scatter plot for A2 vs percentage in ALL MPAplot_A2 <-ggplot(results_coords, aes(x = A2, y = percentage_in_ALL_MPA)) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="blue", se =TRUE) +theme_minimal() +labs(title ="A2 vs Percentage in ALL MPA",x ="A2",y ="Percentage in ALL MPA")# Set a common aspect ratio for both plotsplot_A1 <- plot_A1 +theme(aspect.ratio =1)plot_A2 <- plot_A2 +theme(aspect.ratio =1)# Create the combined plotcombined_plot <-grid.arrange(plot_A1, plot_A2, ncol =2)
Code
#Create kernel density of the trait space # Load required packageslibrary(BAT)# Assuming 'coords' is your dataframe# Extract the first two columns (A1 and A2)load(here::here("Data", "GAP analyses", "coords_1005.Rdata"))trait_space <- coords[, 1:2]sp_df=grids.fd_new# Get the species names from the trait spacetrait_species <-rownames(coords)# Subset the sp_df to keep only the columns (species) found in the trait spacesp_df_filtered <- sp_df[, colnames(sp_df) %in% trait_species]# Replace all NA values with 0sp_df_filtered[is.na(sp_df_filtered)] <-0# Create a global community matrixglobal_comm <-matrix(1, nrow =1, ncol =ncol(sp_df_filtered))colnames(global_comm) <-colnames(sp_df_filtered)rownames(global_comm) <-"global"global_kernel <- BAT::kernel.build(comm = global_comm, trait = trait_space,method ="gaussian")
Building hypervolume 1 of 1
Code
# Extract coordinates (trait values)trait_coords <- global_kernel@Data# Extract random points and their corresponding density valuesrandom_points <- global_kernel@RandomPointsdensity_values <- global_kernel@ValueAtRandomPoints# Create a data frame for the density plotplot_data <-data.frame(A1 = random_points[,1],A2 = random_points[,2],Density = density_values)# Create a data frame for the original trait pointstrait_data <-data.frame(A1 = trait_coords[,1],A2 = trait_coords[,2])#Build the GAM library(mgcv)library(dplyr)# Prepare data for GAMgam_data <- results_coords %>%select(Species, A1, A2, percentage_in_ALL_MPA) %>%filter(!is.na(percentage_in_ALL_MPA)) # Remove any rows with NA in the response variable# Build GAMgam_model <-gam(percentage_in_ALL_MPA ~s(A1, A2), data = gam_data, method ="REML")summary(gam_model)
# Make predictionsplot_data$predicted <-predict(gam_model, newdata = plot_data, type ="response")library(ggplot2)library(viridis)# Plot predictionsgam_plot <-ggplot() +geom_point(data = plot_data, aes(x = A1, y = A2, color = predicted), alpha =0.5) +#geom_point(data = gam_data, aes(x = A1, y = A2), color = "red", size = 2) +scale_color_viridis_c(name ="Predicted % MPA Coverage") +theme_minimal() +labs(title ="GAM Predictions: Percentage in ALL MPA",x ="Trait Axis 1",y ="Trait Axis 2")print(gam_plot)
Code
# Create a smoother surface plotgam_density_plot <-ggplot(plot_data, aes(x = A1, y = A2, z = predicted)) +stat_summary_2d(fun = mean, bins =50) +scale_fill_viridis_c(name ="Predicted % MPA Coverage") +theme_minimal() +labs(title ="GAM Predictions: Percentage in ALL MPA",x ="Trait Axis 1",y ="Trait Axis 2")print(gam_density_plot)
Key message: There is a significant, relationship between the percentage of the range of sharks and rays covered by MPAs and axis 1 & 2 of the trait space. Species located in the bottom right corner of the trait space tend to be more protected, this relationship is highly significant, however, very week (R2 = 0.04, p < 0.001).
Relate to phylogeny
Relate the percentage of species range overlapped by MPAs to the phylogenetic tree of species, with tests of phylogenetic signal and plots of trees with the variable mapped onto the tree.
Code
# Relate to phylogeny ----# For percentage range in ALL MPAs library(phytools)library(here)library(dplyr)library(purrr)library(ape)library(stringr)# Load the list of treesload(here::here("Data", "GAP analyses", "list_tree_sharks_p.Rdata"))# Function to modify species namesmodify_species_name <-function(name) {str_replace(name, " ", "_")}# Modify species names in the results dataframeresults_IUCN_filtered <- results_IUCN_filtered %>%mutate(species_modified =modify_species_name(species))compute_phylo_signal <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_ALL_MPA)) %>%arrange(match(species_modified, tree$tip.label)) n_species_after_matching <-nrow(matched_data)# Prune the tree to match the available data pruned_tree <-keep.tip(tree, matched_data$species_modified) n_tips_pruned_tree <-length(pruned_tree$tip.label)# Compute phylogenetic signal using Pagel's lambda, capturing any output signal <-tryCatch({ captured_output <-capture.output({ result <-phylosig(pruned_tree, matched_data$percentage_in_ALL_MPA, method ="lambda", test =TRUE) }) result$captured_output <- captured_output result }, error =function(e) { error_message <-paste("Error in phylosig:", e$message)return(list(lambda =NA, P =NA, error = error_message, captured_output =NA)) })# Return all informationreturn(list(lambda = signal$lambda,p_value = signal$P,n_species = n_species_after_matching,n_tips_pruned_tree = n_tips_pruned_tree,error =if(!is.null(signal$error)) signal$error elseNA,captured_output =paste(signal$captured_output, collapse ="\n") ))}# Apply the function to all trees: using the first two only for the moment to save timeresults <-map_dfr(list_tree_sharks_p[1:2], ~compute_phylo_signal(.x, results_IUCN_filtered), .id ="tree")# Now, create the kable separatelylibrary(knitr)library(dplyr)library(kableExtra)results %>%select(1:4) %>%kable(caption ="Phylogenetic Signal Results for all MPAs",col.names =c("Tree", "Lambda", "P-value", "Number of Species"),digits =3,align =c('l', 'c', 'c', 'c')) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"))
Phylogenetic Signal Results for all MPAs
Tree
Lambda
P-value
Number of Species
1
0.121
0.001
972
2
0.170
0.000
972
Code
# Compute summary statisticssummary_stats <- results %>%summarise(mean_lambda =mean(lambda, na.rm =TRUE),median_lambda =median(lambda, na.rm =TRUE),sd_lambda =sd(lambda, na.rm =TRUE),mean_p =mean(p_value, na.rm =TRUE),median_p =median(p_value, na.rm =TRUE),sd_p =sd(p_value, na.rm =TRUE),mean_n_species =mean(n_species, na.rm =TRUE),min_n_species =min(n_species, na.rm =TRUE),max_n_species =max(n_species, na.rm =TRUE) )#print(summary_stats)# For percentage range in no-take MPAs library(phytools)library(here)library(dplyr)library(purrr)library(ape)library(stringr)# Load the list of treesload(here::here("Data", "GAP analyses", "list_tree_sharks_p.Rdata"))# Function to modify species namesmodify_species_name <-function(name) {str_replace(name, " ", "_")}# Modify species names in the results dataframeresults_IUCN_filtered <- results_IUCN_filtered %>%mutate(species_modified =modify_species_name(species))compute_phylo_signal <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_NT_MPA)) %>%arrange(match(species_modified, tree$tip.label)) n_species_after_matching <-nrow(matched_data)# Prune the tree to match the available data pruned_tree <-keep.tip(tree, matched_data$species_modified) n_tips_pruned_tree <-length(pruned_tree$tip.label)# Compute phylogenetic signal using Pagel's lambda, capturing any output signal <-tryCatch({ captured_output <-capture.output({ result <-phylosig(pruned_tree, matched_data$percentage_in_NT_MPA, method ="lambda", test =TRUE) }) result$captured_output <- captured_output result }, error =function(e) { error_message <-paste("Error in phylosig:", e$message)return(list(lambda =NA, P =NA, error = error_message, captured_output =NA)) })# Return all informationreturn(list(lambda = signal$lambda,p_value = signal$P,n_species = n_species_after_matching,n_tips_pruned_tree = n_tips_pruned_tree,error =if(!is.null(signal$error)) signal$error elseNA,captured_output =paste(signal$captured_output, collapse ="\n") ))}# Apply the function to all trees: use only the first two trees for the moment to save timeresults <-map_dfr(list_tree_sharks_p[1:2], ~compute_phylo_signal(.x, results_IUCN_filtered), .id ="tree")# Now, create the kable separatelylibrary(knitr)library(dplyr)library(kableExtra)results %>%select(1:4) %>%kable(caption ="Phylogenetic Signal Results for no-take MPAs",col.names =c("Tree", "Lambda", "P-value", "Number of Species"),digits =3,align =c('l', 'c', 'c', 'c')) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"))
Phylogenetic Signal Results for no-take MPAs
Tree
Lambda
P-value
Number of Species
1
0.046
0.056
972
2
0.041
0.075
972
Code
# Compute summary statisticssummary_stats <- results %>%summarise(mean_lambda =mean(lambda, na.rm =TRUE),median_lambda =median(lambda, na.rm =TRUE),sd_lambda =sd(lambda, na.rm =TRUE),mean_p =mean(p_value, na.rm =TRUE),median_p =median(p_value, na.rm =TRUE),sd_p =sd(p_value, na.rm =TRUE),mean_n_species =mean(n_species, na.rm =TRUE),min_n_species =min(n_species, na.rm =TRUE),max_n_species =max(n_species, na.rm =TRUE) )#print(summary_stats)#Plot the tree with color gradient on terminal branches library(ggtree)library(ggplot2)library(dplyr)library(RColorBrewer)library(tidytree)# Function to get a color paletteget_color_palette <-function(n) {colorRampPalette(brewer.pal(9, "YlGnBu"))(n) # Changed to YlGnBu}# All MPAs # Function to plot the circular treeplot_circular_colored_tree <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_ALL_MPA)) %>%arrange(match(species_modified, tree$tip.label))# Print summary of percentage_in_ALL_MPA for debugging#print(summary(matched_data$percentage_in_ALL_MPA))# Prune the tree to match the available data pruned_tree <- ape::keep.tip(tree, matched_data$species_modified)# Create color palette n_colors <-100 color_palette <-get_color_palette(n_colors)# Convert tree to tidy format and add percentage data tree_data <-as_tibble(pruned_tree) %>%left_join(matched_data, by =c("label"="species_modified"))# Plot the circular tree p <-ggtree(pruned_tree, layout="circular", aes(color=percentage_in_ALL_MPA), size =0.3) %<+% tree_data# Color the branches p <- p +scale_color_gradientn(colours = color_palette, name ="% Range within all MPAs",limits =c(0, 100),na.value ="grey50") +theme(legend.position ="right")# Remove default labels and add a title p <- p +theme(plot.title =element_text(hjust =0.5)) return(p)}# Plot the first treetree_plot_ALL <-plot_circular_colored_tree(list_tree_sharks_p[[1]], results_IUCN_filtered)# Display the plotprint(tree_plot_ALL)
Code
# No-take MPAs # Function to plot the circular treeplot_circular_colored_tree <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_NT_MPA)) %>%arrange(match(species_modified, tree$tip.label))# Print summary of percentage_in_ALL_MPA for debugging#print(summary(matched_data$percentage_in_NT_MPA))# Prune the tree to match the available data pruned_tree <- ape::keep.tip(tree, matched_data$species_modified)# Create color palette n_colors <-100 color_palette <-get_color_palette(n_colors)# Convert tree to tidy format and add percentage data tree_data <-as_tibble(pruned_tree) %>%left_join(matched_data, by =c("label"="species_modified"))# Plot the circular tree p <-ggtree(pruned_tree, layout="circular", aes(color=percentage_in_NT_MPA), size =0.3) %<+% tree_data# Color the branches p <- p +scale_color_gradientn(colours = color_palette, name ="% Range within no-take MPAs",limits =c(0, 100),na.value ="grey50") +theme(legend.position ="right")# Remove default labels and add a title p <- p +theme(plot.title =element_text(hjust =0.5)) return(p)}# Plot the first treetree_plot_NT <-plot_circular_colored_tree(list_tree_sharks_p[[1]], results_IUCN_filtered)# Display the plotprint(tree_plot_NT)
Key message: There are significant phylogenetic signals with the percentage of MPA coverage for all MPAs (lambda= 0.12-0.17; < 0.05), and for no-take MPAs, this relationship is almost significant (lambda = 0.04; 0.08 > p > 0.05). However, patterns are not obvious on the trees. Analyses were ran for two trees only but can be replicated to all 100 trees.
Relate to FUSE and EDGE2
Relate the percentage of species range overlapped by MPAs to FUSE and EDGE2
Code
library(jsonlite)library(here)library(ggplot2)library(dplyr)library(purrr)library(patchwork)library(knitr)library(kableExtra)# Read the JSON filedata <-fromJSON(here("Data", "shark_conservation_metrics_no_freshwater.json"))# Function to process data and create plotprocess_and_plot <-function(metric_name) {# Merge and process data combined_data <- data[[metric_name]]$info %>%left_join(results_coords, by ="Species") %>%mutate(across(where(is.list), unlist),across(where(is.character), as.factor),!!metric_name :=pmin(pmax(!!sym(metric_name), 0), 1))# Create plot plot <-ggplot(combined_data, aes_string(x = metric_name, y ="percentage_in_ALL_MPA")) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="red", se =TRUE) +theme_minimal() +labs(title =paste(metric_name, "vs % Range Protected"),x = metric_name,y ="% Range Protected") +scale_x_continuous(limits =c(0, 1), breaks =seq(0, 1, 0.2)) +scale_y_continuous(limits =c(0, 100), breaks =seq(0, 100, 20)) +theme(aspect.ratio =1)# Calculate correlation cor_test <-cor.test(combined_data[[metric_name]], combined_data$percentage_in_ALL_MPA, use ="complete.obs")return(list(plot = plot, cor_test = cor_test))}# Process and plot for EDGE2 and FUSEedge2_results <-process_and_plot("EDGE2") fuse_results <-process_and_plot("FUSE") # Combine plots in a grid with 2 columnscombined_plot <- edge2_results$plot + fuse_results$plot +plot_layout(ncol =2) +plot_annotation(title =NULL,theme =theme(plot.title =element_text(hjust =0.5, size =16)) )# Print the combined plotprint(combined_plot)
Code
# Create a data frame with correlation test resultscorrelation_results <-data.frame(Metric =c("EDGE2", "FUSE"),Correlation =c(edge2_results$cor_test$estimate, fuse_results$cor_test$estimate),P_value =c(edge2_results$cor_test$p.value, fuse_results$cor_test$p.value))# Create and print the kable tablekable_table <-kable(correlation_results, col.names =c("Metric", "Correlation", "P-value"),digits =c(0, 3, 4),caption ="Correlation Test Results") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"),full_width =FALSE)kable_table
Correlation Test Results
Metric
Correlation
P-value
EDGE2
-0.119
2e-04
FUSE
-0.125
1e-04
Key message: There is significant negative relationship between EDGE2 and FUSE with the % of range protected. Species with high FUSE or high EDGE2 tend to be less protected than those with low EDGE2 or FUSE.
Source Code
---title: "GAP analyses"author: "Théophile L. Mouton"date: "October 9, 2024"format: html: toc: true toc-location: right css: custom.css output-file: "GAP_analyses_EEZs.html" self-contained: true code-fold: true code-tools: trueeditor: visualexecute: warning: false message: false echo: true---# Load the dataLoad the grid and the MPA raster layers```{r}# Load necessary librarieslibrary(raster)library(here)library(sp)library(dplyr)library(tidyr)# Load the data ----load(here::here("Data", "GAP analyses", "puvsp_marine.Rdata"))# Load the raster layersmpa_ALL <- raster::raster(here::here("Data", "GAP analyses", "mpa_ALL_binary.tif"))mpa_NT <- raster::raster(here::here("Data", "GAP analyses", "mpa_NT_binary.tif"))```# GAP analysesGAP analyses of shark and ray range overlaps with Marine Protected Areas```{r}# Convert species dataframe to spatial pointsspecies_points <-SpatialPointsDataFrame(coords = puvsp_marine[, c("lon", "lat")], data = puvsp_marine, proj4string =CRS(projection(mpa_ALL)))# Extract MPA values for each species pointmpa_ALL_values <- raster::extract(mpa_ALL, species_points)mpa_NT_values <- raster::extract(mpa_NT, species_points)# Add MPA values to the species dataframepuvsp_marine$mpa_ALL_present <- mpa_ALL_valuespuvsp_marine$mpa_NT_present <- mpa_NT_values# Function to calculate percentage of range in MPAcalculate_mpa_percentage <-function(species_column, mpa_column) { species_presence <- species_column ==1# Assuming 1 indicates species presence total_range <-sum(species_presence, na.rm =TRUE) range_in_mpa <-sum(species_presence & mpa_column ==1, na.rm =TRUE)if (total_range ==0) {return(NA) # Return NA if the species is not present anywhere } percentage <- (range_in_mpa / total_range) *100return(percentage)}# Apply function to all species columns for both MPA typesspecies_columns <-4:(ncol(puvsp_marine) -2) # Assuming species columns start at 4mpa_ALL_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_ALL_present))mpa_NT_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_NT_present))# Create a dataframe with resultsresults <-data.frame(species =names(mpa_ALL_percentages),percentage_in_ALL_MPA = mpa_ALL_percentages,percentage_in_NT_MPA = mpa_NT_percentages)# Remove any NA resultsresults <- results[!is.na(results$percentage_in_ALL_MPA) &!is.na(results$percentage_in_NT_MPA), ]# Sort results by percentage in ALL MPAs (descending)results <- results[order(-results$percentage_in_ALL_MPA), ]# Display top 10 species#print(head(results, 10))# Summary statisticscat("\nSummary Statistics for ALL MPAs:\n")print(summary(results$percentage_in_ALL_MPA))cat("\nSummary Statistics for No-Take MPAs:\n")print(summary(results$percentage_in_NT_MPA))# Save results to CSV#write.csv(results, "species_mpa_coverage_ALL_and_NT.csv", row.names = FALSE)```# Null model of MPA placement within EEZsNull model of MPA placement and overlaps with the range of sharks and rays```{r}library(raster)library(here)library(sp)library(sf)library(parallel)library(pbapply)# Load the bathymetry rasterBathy <-raster(here::here("Data", "bathymetry-0.1deg-adjusted.tif"))# Create marine mask from bathymetry (areas < 0)marine_mask <- Bathy <0# Resample marine mask to match MPA raster resolutionmarine_mask_resampled <- raster::resample(marine_mask, mpa_ALL, method ="ngb")# Ensure marine_mask_resampled is binary (0 or 1)marine_mask_resampled <- raster::reclassify(marine_mask_resampled, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Load the EEZ shapefileeez <-st_read(here("Data", "World_EEZ_v12_20231025", "eez_v12.shp"), quiet =TRUE)# Function to create a raster mask of EEZs with unique values for each countrycreate_eez_mask <-function(template_raster) {# Ensure EEZ has a unique identifier for each country eez$country_id <-as.numeric(as.factor(eez$SOVEREIGN1))# Rasterize the EEZ shapefile using the country_id eez_raster <-rasterize(eez, template_raster, field ="country_id")return(eez_raster)}# Updated create_random_mpa function to randomize within each country's EEZcreate_random_mpa <-function(template_raster, marine_mask, eez_mask) {# Ensure template_raster is binary template_raster <- raster::reclassify(template_raster, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Combine marine_mask and eez_mask combined_mask <- marine_mask * (eez_mask >0)# Get unique country IDs country_ids <-unique(eez_mask[!is.na(eez_mask)]) country_ids <- country_ids[country_ids >0]# Initialize random raster random_raster <- template_raster random_raster[] <-NAfor (country_id in country_ids) {# Create mask for current country country_mask <- eez_mask == country_id# Count original marine MPA cells within current country's EEZ original_mpa_cells <-sum(template_raster[] ==1& country_mask[] &!is.na(combined_mask[]), na.rm =TRUE)# Get all valid cells for current country (marine areas within EEZ) valid_cells <-which(country_mask[] &!is.na(combined_mask[])) total_valid_cells <-length(valid_cells)if (original_mpa_cells >0&& total_valid_cells >0) {if (original_mpa_cells > total_valid_cells) {warning(paste("More MPA cells than valid marine cells for country ID", country_id, ". Adjusting MPA cell count.")) original_mpa_cells <- total_valid_cells }# Randomly select valid cells to be MPAs for current country mpa_cells <-sample(valid_cells, size =min(original_mpa_cells, total_valid_cells), replace =FALSE)# Update random raster random_raster[valid_cells] <-0 random_raster[mpa_cells] <-1 } }return(random_raster)}# Calculate MPA percentage functioncalculate_mpa_percentage <-function(species_values, mpa_values) { total_cells <-sum(!is.na(species_values)) mpa_cells <-sum(mpa_values ==1&!is.na(species_values), na.rm =TRUE) percentage <- (mpa_cells / total_cells) *100return(percentage)}# Main analysis functionrun_random_mpa_analysis <-function(species_data, mpa_raster, marine_mask, eez_mask, n_iterations =100) {# Convert species dataframe to spatial points species_points <- sp::SpatialPointsDataFrame(coords = species_data[, c("lon", "lat")], data = species_data, proj4string = sp::CRS(raster::projection(mpa_raster)))# Prepare results storage species_columns <-4:(ncol(species_data) -1) # Adjust if needed all_results <-matrix(nrow =length(species_columns), ncol = n_iterations)rownames(all_results) <-names(species_data)[species_columns]# Use pblapply for parallel processing with a progress bar all_results <-pblapply(1:n_iterations, function(i) {set.seed(i) # Set seed for reproducibility random_mpa <-create_random_mpa(mpa_raster, marine_mask, eez_mask) random_mpa_values <- raster::extract(random_mpa, species_points)sapply(species_data[, species_columns], function(x) calculate_mpa_percentage(x, random_mpa_values)) }, cl =detectCores() -1) # Use one less than available cores all_results <-do.call(cbind, all_results)# Calculate mean and standard deviation for each species mean_percentages <-rowMeans(all_results, na.rm =TRUE) sd_percentages <-apply(all_results, 1, sd, na.rm =TRUE) results <-data.frame(species =rownames(all_results),mean_percentage = mean_percentages,sd_percentage = sd_percentages )return(results)}# Function to process resultsprocess_results <-function(results, mpa_type) {# Sort results by mean_percentage in descending order results <- results[order(-results$mean_percentage), ]# Calculate summary statistics summary_stats <-summary(results$mean_percentage)# Write results to CSVwrite.csv(results, here::here("Outputs",paste0("species_random_", mpa_type, "_coverage.csv")), row.names =FALSE)# Return a list containing the processed datareturn(list(top_10 =head(results, 10),summary_stats = summary_stats,all_results = results ))}# Create the EEZ mask with unique country IDseez_mask <-create_eez_mask(mpa_ALL)# Run the analysis with the country-specific EEZ constrainttryCatch({ random_results_ALL <-run_random_mpa_analysis(puvsp_marine, mpa_ALL, marine_mask_resampled, eez_mask, n_iterations =100)print("Analysis for ALL MPAs completed successfully.")}, error =function(e) {print(paste("Error in ALL MPAs analysis:", e$message))})tryCatch({ random_results_NT <-run_random_mpa_analysis(puvsp_marine, mpa_NT, marine_mask_resampled, eez_mask, n_iterations =100)print("Analysis for No-Take MPAs completed successfully.")}, error =function(e) {print(paste("Error in No-Take MPAs analysis:", e$message))})# Process results for both MPA typesprocessed_results <-list()if (exists("random_results_ALL")) { processed_results[["ALL MPAs"]] <-process_results(random_results_ALL, "ALL MPAs")}if (exists("random_results_NT")) { processed_results[["No-take MPAs"]] <-process_results(random_results_NT, "No-take MPAs")}```**Important note:** This NULL model randomly distributes protected grid cells at a 0.5-degree resolution within country's EEZs. However, it does not preserve MPA shape.# Compare resultsCompare results between the MPA network and the Null model of MPA placement```{r}# Compare results ---- library(dplyr)library(tidyr)library(ggplot2)# Load actual MPA coverage dataactual_coverage <-read.csv(here::here("Outputs", "species_mpa_coverage_ALL_and_NT.csv"))colnames(actual_coverage)[2:3]=c("ALL","NT")# Load random MPA coverage resultsrandom_ALL <-read.csv(here::here("Outputs", "species_random_ALL MPAs_coverage.csv"))random_NT <-read.csv(here::here("Outputs", "species_random_No-take MPAs_coverage.csv"))# Reshape actual coverage data to long formatactual_long <- actual_coverage %>%pivot_longer(cols =c(ALL, NT), names_to ="mpa_type", values_to ="actual_percentage")# Function to merge and compare actual vs random datacompare_coverage <-function(actual_data, random_data, mpa_type) { comparison <- actual_data %>%filter(mpa_type ==!!mpa_type) %>%left_join(random_data, by ="species") %>%mutate(difference = actual_percentage - mean_percentage,z_score = (actual_percentage - mean_percentage) / sd_percentage)return(comparison)}# Create comparison dataframescomparison_ALL <-compare_coverage(actual_long, random_ALL, "ALL")comparison_NT <-compare_coverage(actual_long, random_NT, "NT")# Function to summarize and print resultslibrary(knitr)library(kableExtra)library(knitr)library(kableExtra)summarize_results <-function(comparison_data, mpa_type) { over_represented <-mean(comparison_data$difference >0, na.rm=TRUE) *100 under_represented <-mean(comparison_data$difference <0, na.rm=TRUE) *100 equally_represented <-100- over_represented - under_represented summary_df <-data.frame(Representation =c("Over-represented", "Under-represented", "Equally represented"),Percentage =round(c(over_represented, under_represented, equally_represented), 2) )kable(summary_df, format ="html", col.names =c("Representation", "Percentage (%)"),caption =paste("Summary for", mpa_type, "MPAs")) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(2, width ="100px") %>%row_spec(0, bold =TRUE) %>%add_header_above(c(" "=1, "Species"=1)) %>%footnote(general ="Percentages may not sum to 100% due to rounding.")}# Summarize resultssummarize_results(comparison_ALL, "ALL")summarize_results(comparison_NT, "No-Take")# Identify significantly over/under-represented specieslibrary(dplyr)library(knitr)library(kableExtra)library(dplyr)library(knitr)library(kableExtra)significant_species <-function(comparison_data, mpa_type, z_threshold =1.96) { over_represented <- comparison_data %>%filter(z_score > z_threshold) %>%arrange(desc(z_score)) under_represented <- comparison_data %>%filter(z_score <-z_threshold) %>%arrange(z_score) top_5_over <- over_represented %>%select(species, actual_percentage, mean_percentage, difference, z_score) %>%head(5) top_5_under <- under_represented %>%select(species, actual_percentage, mean_percentage, difference, z_score) %>%head(5) combined_table <-bind_rows(mutate(top_5_over, representation ="Over-represented"),mutate(top_5_under, representation ="Under-represented") ) %>%mutate(across(where(is.numeric), ~round(., 2))) kable_output <-kable(combined_table, format ="html", col.names =c("Species", "Actual %", "Random %", "Difference", "Z-score", "Representation"),caption =paste("Top 5 Significantly Over/Under-represented Species in", mpa_type, "MPAs")) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(1, width ="200px") %>%column_spec(2:5, width ="100px") %>%column_spec(6, width ="150px") %>%row_spec(0, bold =TRUE) %>%pack_rows("Over-represented", 1, 5, label_row_css ="background-color: #e6f3ff; color: #000;") %>%pack_rows("Under-represented", 6, 10, label_row_css ="background-color: #fff0e6; color: #000;") footnote_text <-paste("Total significantly over-represented species:", nrow(over_represented), "\n","Total significantly under-represented species:", nrow(under_represented) ) kable_output %>%footnote(general = footnote_text, general_title ="Note:")}# Identify significant speciessignificant_species(comparison_ALL, "ALL")significant_species(comparison_NT, "No-Take")```**Key message:** \>60% of species are under-represented by the global MPA network (i.e. less protected by the current MPA network than by a random placement of MPAs).# Map results of the GAP analysesMap Standardised Effect Sizes of MPA coverage```{r}#For ALL MPAs difference_sp=comparison_ALL[,c(1,3,6)]#Plot the difference # Load necessary librarieslibrary(dplyr)library(tidyr)library(ggplot2)#SES:# Step 1: Calculate SES for each species in comparison_ALLcomparison_ALL <- comparison_ALL %>%mutate(SES = (actual_percentage - mean_percentage) / sd_percentage)# Assuming you have a similar species-to-grid-cell mapping like puvsp_marine# Reshape puvsp_marine from wide to long format (if needed)puvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data (comparison_ALL) to puvsp_longcombined_data <- puvsp_long %>%left_join(comparison_ALL, by =c("species"="species"))# Step 3: Group by grid cell (id, lon, lat) and calculate mean SESmean_ses_per_cell <- combined_data %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES, na.rm =TRUE)) %>%ungroup()# Load necessary librarieslibrary(ggplot2)library(rnaturalearth)library(sf)# Step 1: Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Step 2: Plot the mean SES with a diverging color scaleg_ses =ggplot() +geom_tile(data = mean_ses_per_cell, aes(x = lon, y = lat, fill = mean_SES)) +geom_sf(data = land, fill ="darkgrey", color =NA) +# Add land in dark greyscale_fill_gradient2(low ="#2166ac", mid ="#f0f0f0", high ="#b2182b", midpoint =0# Blue-White-Red gradient, with midpoint at 0 ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +# Set global coordinatestheme_minimal() +theme(legend.position ="bottom", # Position the legend at the bottomlegend.title =element_text(hjust =0.5), # Center the legend titlelegend.key.width =unit(3, "cm"), # Adjust width of the legend barlegend.key.height =unit(0.5, "cm") # Adjust height of the legend bar ) +guides(fill =guide_colorbar(direction ="horizontal", # Make the legend horizontaltitle.position ="top", # Move the title to the top of the legendtitle.hjust =0.5# Center the title horizontally ) ) +labs(title ="Standardized Effect Size (SES) of Observed vs. Null MPA Coverage",x ="Longitude",y ="Latitude",fill ="Mean SES")print(g_ses)#For No-take MPAs difference_sp=comparison_NT[,c(1,3,6)]#SES# Step 1: Calculate SES for each species in comparison_ALLcomparison_NT <- comparison_NT %>%mutate(SES = (actual_percentage - mean_percentage) / sd_percentage)# Assuming you have a similar species-to-grid-cell mapping like puvsp_marine# Reshape puvsp_marine from wide to long format (if needed)puvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data (comparison_ALL) to puvsp_longcombined_data <- puvsp_long %>%left_join(comparison_NT, by =c("species"="species"))# Step 3: Group by grid cell (id, lon, lat) and calculate mean SESmean_ses_per_cell <- combined_data %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES, na.rm =TRUE)) %>%ungroup()# Load necessary librarieslibrary(ggplot2)library(rnaturalearth)library(sf)# Step 1: Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Step 2: Plot the mean SES with a diverging color scaleg_ses =ggplot() +geom_tile(data = mean_ses_per_cell, aes(x = lon, y = lat, fill = mean_SES)) +geom_sf(data = land, fill ="darkgrey", color =NA) +# Add land in dark greyscale_fill_gradient2(low ="#2166ac", mid ="#f0f0f0", high ="#b2182b", midpoint =0# Blue-White-Red gradient, with midpoint at 0 ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +# Set global coordinatestheme_minimal() +theme(legend.position ="bottom", # Position the legend at the bottomlegend.title =element_text(hjust =0.5), # Center the legend titlelegend.key.width =unit(3, "cm"), # Adjust width of the legend barlegend.key.height =unit(0.5, "cm") # Adjust height of the legend bar ) +guides(fill =guide_colorbar(direction ="horizontal", # Make the legend horizontaltitle.position ="top", # Move the title to the top of the legendtitle.hjust =0.5# Center the title horizontally ) ) +labs(title ="Standardized Effect Size (SES) of Observed vs. Null no-take MPA Coverage",x ="Longitude",y ="Latitude",fill ="Mean SES")print(g_ses)```**Key message:** Northern Pacific, Northern Atlantic, Southern Pacific, Southern Atlantic and global coastal/continental sharks and rays are less protected by MPAs than expected under a random placement of MPAs. This is even more the case for no-take MPAs. # Relate to IUCN status1. Relate the percentage of species range overlapped by MPAs to the IUCN Red List status with difference tests between categories```{r}IUCN <-readRDS(here::here("Data","GAP analyses","sharks_iucn_final.RDS"))# Remove underscores from species namesIUCN$species <-gsub("_", " ", IUCN$Species)results_IUCN=left_join(IUCN, results, by=c("species"))library(ggplot2)library(dplyr)# Convert iucn_GE to a factor for better plottingresults_IUCN$iucn_GE <-as.factor(results_IUCN$iucn_GE)results_IUCN <- results_IUCN %>%mutate(IUCN_status =case_when( iucn_GE ==0~"LC", iucn_GE ==1~"NT", iucn_GE ==2~"VU", iucn_GE ==3~"EN", iucn_GE ==4~"CR",TRUE~"Unknown" ))# Filter out the "Unknown" categoryresults_IUCN_filtered <- results_IUCN %>%filter(IUCN_status !="Unknown")# Define IUCN colors and orderiucn_colors <-c("CR"="#D81E05", # Red"EN"="#FC7F3F", # Orange"VU"="#FEC748", # Yellow"NT"="#58AFFF", # Light Blue"LC"="#38AB38"# Green)iucn_order <-c("LC", "NT", "VU", "EN", "CR")library(ggplot2)library(dplyr)library(dunn.test)# Perform Kruskal-Wallis testkruskal_result <-kruskal.test(percentage_in_ALL_MPA ~ IUCN_status, data = results_IUCN_filtered)# Perform Dunn's test for pairwise comparisons and capture the outputinvisible(capture.output( dunn_result <-dunn.test(results_IUCN_filtered$percentage_in_ALL_MPA, results_IUCN_filtered$IUCN_status, method ="bonferroni")))# Create a function to add significance levels to the plotadd_significance <-function(p.value) {if(p.value <=0.001) return("***")elseif(p.value <=0.01) return("**")elseif(p.value <=0.05) return("*")elsereturn("ns")}# Create a data frame with pairwise comparisons and their significancesignificant_comparisons <-data.frame(group1 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 1),group2 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 2),p.value = dunn_result$P.adjusted,stringsAsFactors =FALSE)# Add significance levels to the data framesignificant_comparisons$significance <-sapply(significant_comparisons$p.value, add_significance)# Filter out non-significant comparisonssignificant_comparisons <- significant_comparisons[significant_comparisons$significance !="ns", ]# Calculate x positions for the significance barsiucn_levels <-c("LC", "NT", "VU", "EN", "CR")x_positions <-seq_along(iucn_levels)names(x_positions) <- iucn_levelssignificant_comparisons$x1 <- x_positions[significant_comparisons$group1]significant_comparisons$x2 <- x_positions[significant_comparisons$group2]# Sort comparisons based on the difference between group indicessignificant_comparisons$group_diff <-abs(significant_comparisons$x2 - significant_comparisons$x1)significant_comparisons <- significant_comparisons[order(significant_comparisons$group_diff, decreasing =TRUE), ]# Calculate y positions for the sorted comparisonssignificant_comparisons$y.position <-seq(max(results_IUCN_filtered$percentage_in_ALL_MPA, na.rm =TRUE) +5, by =5, length.out =nrow(significant_comparisons))# Recreate the base violin plotviolin_plot <-ggplot(results_IUCN_filtered, aes(x =factor(IUCN_status, levels = iucn_order), y = percentage_in_ALL_MPA)) +geom_violin(aes(fill = IUCN_status, color = IUCN_status), trim =FALSE, alpha =0.5) +geom_jitter(width =0.1, size =0.5, alpha =0.5, color ="darkgray") +geom_boxplot(width =0.1, fill ="white", color ="black", outlier.shape =NA, alpha =0.8) +labs(x ="IUCN Status", y ="(%) Range within MPAs") +theme_minimal() +theme(plot.title =element_text(hjust =0.5), legend.position ="none") +scale_x_discrete(limits = iucn_order) +scale_fill_manual(values = iucn_colors) +scale_color_manual(values = iucn_colors)# Add significance bars and labelsviolin_plot_with_significance <- violin_plot +geom_segment(data = significant_comparisons,aes(x = x1, xend = x2, y = y.position, yend = y.position),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x1, xend = x1, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x2, xend = x2, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_text(data = significant_comparisons,aes(x = (x1 + x2) /2, y = y.position, label = significance),inherit.aes =FALSE,vjust =-0.5, size =3)# Display the updated plotprint(violin_plot_with_significance)# Save the plot#ggsave("violin_plot_with_significance.pdf", violin_plot_with_significance, width = 10, height = 8, units = "in")# Summary statisticssummary_stats <- results_IUCN_filtered %>%group_by(IUCN_status) %>%summarise(count =n(),mean =mean(percentage_in_ALL_MPA, na.rm =TRUE),median =median(percentage_in_ALL_MPA, na.rm =TRUE),sd =sd(percentage_in_ALL_MPA, na.rm =TRUE) ) %>%arrange(factor(IUCN_status, levels = iucn_order))print(summary_stats)#Now for no-take MPAs kruskal_result <-kruskal.test(sqrt(percentage_in_NT_MPA) ~ IUCN_status, data = results_IUCN_filtered)# Perform Dunn's test for pairwise comparisons# Perform Dunn's test for pairwise comparisons and capture the outputinvisible(capture.output( dunn_result <-dunn.test(sqrt(results_IUCN_filtered$percentage_in_NT_MPA), results_IUCN_filtered$IUCN_status, method ="bonferroni")))# Create a function to add significance levels to the plotadd_significance <-function(p.value) {if(p.value <=0.001) return("***")elseif(p.value <=0.01) return("**")elseif(p.value <=0.05) return("*")elsereturn("ns")}# Create a data frame with pairwise comparisons and their significancesignificant_comparisons <-data.frame(group1 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 1),group2 =sapply(strsplit(dunn_result$comparisons, " - "), `[`, 2),p.value = dunn_result$P.adjusted,stringsAsFactors =FALSE)# Add significance levels to the data framesignificant_comparisons$significance <-sapply(significant_comparisons$p.value, add_significance)# Filter out non-significant comparisonssignificant_comparisons <- significant_comparisons[significant_comparisons$significance !="ns", ]# Calculate x positions for the significance barsiucn_levels <-c("LC", "NT", "VU", "EN", "CR")x_positions <-seq_along(iucn_levels)names(x_positions) <- iucn_levelssignificant_comparisons$x1 <- x_positions[significant_comparisons$group1]significant_comparisons$x2 <- x_positions[significant_comparisons$group2]# Sort comparisons based on the difference between group indicessignificant_comparisons$group_diff <-abs(significant_comparisons$x2 - significant_comparisons$x1)significant_comparisons <- significant_comparisons[order(significant_comparisons$group_diff, decreasing =TRUE), ]# Calculate y positions for the sorted comparisonssignificant_comparisons$y.position <-seq(max(results_IUCN_filtered$percentage_in_ALL_MPA, na.rm =TRUE) +5, by =5, length.out =nrow(significant_comparisons))# Recreate the base violin plotviolin_plot <-ggplot(results_IUCN_filtered, aes(x =factor(IUCN_status, levels = iucn_order), y = percentage_in_NT_MPA)) +geom_violin(aes(fill = IUCN_status, color = IUCN_status), trim =FALSE, alpha =0.5) +geom_jitter(width =0.1, size =0.5, alpha =0.5, color ="darkgray") +geom_boxplot(width =0.1, fill ="white", color ="black", outlier.shape =NA, alpha =0.8) +labs(x ="IUCN Status", y ="(%) Range within no-take MPAs") +theme_minimal() +theme(plot.title =element_text(hjust =0.5), legend.position ="none") +scale_x_discrete(limits = iucn_order) +scale_y_continuous(transform ="sqrt") +scale_fill_manual(values = iucn_colors) +scale_color_manual(values = iucn_colors)# Add significance bars and labelsviolin_plot_with_significance <- violin_plot +geom_segment(data = significant_comparisons,aes(x = x1, xend = x2, y = y.position, yend = y.position),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x1, xend = x1, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_segment(data = significant_comparisons,aes(x = x2, xend = x2, y = y.position, yend = y.position -2),inherit.aes =FALSE,color ="black", size =0.5) +geom_text(data = significant_comparisons,aes(x = (x1 + x2) /2, y = y.position, label = significance),inherit.aes =FALSE,vjust =-0.5, size =3)# Display the updated plotprint(violin_plot_with_significance)# Summary statisticssummary_stats <- results_IUCN_filtered %>%group_by(IUCN_status) %>%summarise(count =n(),mean =mean(percentage_in_NT_MPA, na.rm =TRUE),median =median(percentage_in_NT_MPA, na.rm =TRUE),sd =sd(percentage_in_NT_MPA, na.rm =TRUE) ) %>%arrange(factor(IUCN_status, levels = iucn_order))print(summary_stats)```**Key message:** Least concerned species are significantly more protected by all MPAs than any other IUCN Red List threat status category. We also note a similar relationship (LC vs VU, EN & CR) for no-take MPAs, however, after p-value adjustment for multiple comparison, differences were not significant for no-take MPAs.# Relate to species traitsRelate the percentage of species range overlapped by MPAs to the trait space of species. 1) Use Pearson correlation tests between the axes of the trait space and MPA coverage. 2) Predict relationships from a GAM into the kernel density gridded trait space.```{r}# Relate to species traits ----load(here::here("Data", "GAP analyses", "coords_1005.Rdata"))load(here::here("Data", "GAP analyses", "grids_commonspecies_corrected_021024.Rdata"))# Load the necessary librarylibrary(tibble)# Convert to tibble and add row names as the first columncoords <- tibble::rownames_to_column(coords, var ="Species")colnames(results)[1]="Species"results_coords=left_join(coords, results)library(Hmisc)library(dplyr)library(tidyr)# Calculate correlation matrix with p-valuescor_matrix_with_p <-rcorr(as.matrix(results_coords[, c("A1", "A2", "A3", "percentage_in_ALL_MPA", "percentage_in_NT_MPA")]))# Extract correlation coefficients and p-valuescor_coefficients <- cor_matrix_with_p$rp_values <- cor_matrix_with_p$P# Create a function to format the resultsformat_cor_p <-function(cor, p) {sprintf("%.3f (p = %.3f)", cor, p)}# Create a data frame with formatted resultsresult_df <-data.frame(Predictor =c("A1", "A2", "A3"),`percentage_in_ALL_MPA`=format_cor_p(cor_coefficients["percentage_in_ALL_MPA", c("A1", "A2", "A3")], p_values["percentage_in_ALL_MPA", c("A1", "A2", "A3")]),`percentage_in_NT_MPA`=format_cor_p(cor_coefficients["percentage_in_NT_MPA", c("A1", "A2", "A3")], p_values["percentage_in_NT_MPA", c("A1", "A2", "A3")]))library(knitr)library(kableExtra)format_correlation_table <-function(result_df) {kable(result_df, format ="html", escape =FALSE,col.names =c("Predictor", "ALL MPA", "NT MPA"),caption ="Pearson Correlation Results") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(1, bold =TRUE) %>%add_header_above(c(" "=1, "Percentage in MPA"=2)) %>%footnote(general ="Values shown as: correlation coefficient (p-value)",general_title ="Note:")}# Usage:formatted_table <-format_correlation_table(result_df)formatted_tablelibrary(gridExtra)# Scatter plot for A1 vs percentage in ALL MPAplot_A1 <-ggplot(results_coords, aes(x = A1, y = percentage_in_ALL_MPA)) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="red", se =TRUE) +theme_minimal() +labs(title ="A1 vs Percentage in ALL MPA",x ="A1",y ="Percentage in ALL MPA")# Scatter plot for A2 vs percentage in ALL MPAplot_A2 <-ggplot(results_coords, aes(x = A2, y = percentage_in_ALL_MPA)) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="blue", se =TRUE) +theme_minimal() +labs(title ="A2 vs Percentage in ALL MPA",x ="A2",y ="Percentage in ALL MPA")# Set a common aspect ratio for both plotsplot_A1 <- plot_A1 +theme(aspect.ratio =1)plot_A2 <- plot_A2 +theme(aspect.ratio =1)# Create the combined plotcombined_plot <-grid.arrange(plot_A1, plot_A2, ncol =2)#Create kernel density of the trait space # Load required packageslibrary(BAT)# Assuming 'coords' is your dataframe# Extract the first two columns (A1 and A2)load(here::here("Data", "GAP analyses", "coords_1005.Rdata"))trait_space <- coords[, 1:2]sp_df=grids.fd_new# Get the species names from the trait spacetrait_species <-rownames(coords)# Subset the sp_df to keep only the columns (species) found in the trait spacesp_df_filtered <- sp_df[, colnames(sp_df) %in% trait_species]# Replace all NA values with 0sp_df_filtered[is.na(sp_df_filtered)] <-0# Create a global community matrixglobal_comm <-matrix(1, nrow =1, ncol =ncol(sp_df_filtered))colnames(global_comm) <-colnames(sp_df_filtered)rownames(global_comm) <-"global"global_kernel <- BAT::kernel.build(comm = global_comm, trait = trait_space,method ="gaussian")# Extract coordinates (trait values)trait_coords <- global_kernel@Data# Extract random points and their corresponding density valuesrandom_points <- global_kernel@RandomPointsdensity_values <- global_kernel@ValueAtRandomPoints# Create a data frame for the density plotplot_data <-data.frame(A1 = random_points[,1],A2 = random_points[,2],Density = density_values)# Create a data frame for the original trait pointstrait_data <-data.frame(A1 = trait_coords[,1],A2 = trait_coords[,2])#Build the GAM library(mgcv)library(dplyr)# Prepare data for GAMgam_data <- results_coords %>%select(Species, A1, A2, percentage_in_ALL_MPA) %>%filter(!is.na(percentage_in_ALL_MPA)) # Remove any rows with NA in the response variable# Build GAMgam_model <-gam(percentage_in_ALL_MPA ~s(A1, A2), data = gam_data, method ="REML")summary(gam_model)# Make predictionsplot_data$predicted <-predict(gam_model, newdata = plot_data, type ="response")library(ggplot2)library(viridis)# Plot predictionsgam_plot <-ggplot() +geom_point(data = plot_data, aes(x = A1, y = A2, color = predicted), alpha =0.5) +#geom_point(data = gam_data, aes(x = A1, y = A2), color = "red", size = 2) +scale_color_viridis_c(name ="Predicted % MPA Coverage") +theme_minimal() +labs(title ="GAM Predictions: Percentage in ALL MPA",x ="Trait Axis 1",y ="Trait Axis 2")print(gam_plot)# Create a smoother surface plotgam_density_plot <-ggplot(plot_data, aes(x = A1, y = A2, z = predicted)) +stat_summary_2d(fun = mean, bins =50) +scale_fill_viridis_c(name ="Predicted % MPA Coverage") +theme_minimal() +labs(title ="GAM Predictions: Percentage in ALL MPA",x ="Trait Axis 1",y ="Trait Axis 2")print(gam_density_plot)```**Key message:** There is a significant, relationship between the percentage of the range of sharks and rays covered by MPAs and axis 1 & 2 of the trait space. Species located in the bottom right corner of the trait space tend to be more protected, this relationship is highly significant, however, very week (R2 = 0.04, p \< 0.001).# Relate to phylogenyRelate the percentage of species range overlapped by MPAs to the phylogenetic tree of species, with tests of phylogenetic signal and plots of trees with the variable mapped onto the tree.```{r}# Relate to phylogeny ----# For percentage range in ALL MPAs library(phytools)library(here)library(dplyr)library(purrr)library(ape)library(stringr)# Load the list of treesload(here::here("Data", "GAP analyses", "list_tree_sharks_p.Rdata"))# Function to modify species namesmodify_species_name <-function(name) {str_replace(name, " ", "_")}# Modify species names in the results dataframeresults_IUCN_filtered <- results_IUCN_filtered %>%mutate(species_modified =modify_species_name(species))compute_phylo_signal <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_ALL_MPA)) %>%arrange(match(species_modified, tree$tip.label)) n_species_after_matching <-nrow(matched_data)# Prune the tree to match the available data pruned_tree <-keep.tip(tree, matched_data$species_modified) n_tips_pruned_tree <-length(pruned_tree$tip.label)# Compute phylogenetic signal using Pagel's lambda, capturing any output signal <-tryCatch({ captured_output <-capture.output({ result <-phylosig(pruned_tree, matched_data$percentage_in_ALL_MPA, method ="lambda", test =TRUE) }) result$captured_output <- captured_output result }, error =function(e) { error_message <-paste("Error in phylosig:", e$message)return(list(lambda =NA, P =NA, error = error_message, captured_output =NA)) })# Return all informationreturn(list(lambda = signal$lambda,p_value = signal$P,n_species = n_species_after_matching,n_tips_pruned_tree = n_tips_pruned_tree,error =if(!is.null(signal$error)) signal$error elseNA,captured_output =paste(signal$captured_output, collapse ="\n") ))}# Apply the function to all trees: using the first two only for the moment to save timeresults <-map_dfr(list_tree_sharks_p[1:2], ~compute_phylo_signal(.x, results_IUCN_filtered), .id ="tree")# Now, create the kable separatelylibrary(knitr)library(dplyr)library(kableExtra)results %>%select(1:4) %>%kable(caption ="Phylogenetic Signal Results for all MPAs",col.names =c("Tree", "Lambda", "P-value", "Number of Species"),digits =3,align =c('l', 'c', 'c', 'c')) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"))# Compute summary statisticssummary_stats <- results %>%summarise(mean_lambda =mean(lambda, na.rm =TRUE),median_lambda =median(lambda, na.rm =TRUE),sd_lambda =sd(lambda, na.rm =TRUE),mean_p =mean(p_value, na.rm =TRUE),median_p =median(p_value, na.rm =TRUE),sd_p =sd(p_value, na.rm =TRUE),mean_n_species =mean(n_species, na.rm =TRUE),min_n_species =min(n_species, na.rm =TRUE),max_n_species =max(n_species, na.rm =TRUE) )#print(summary_stats)# For percentage range in no-take MPAs library(phytools)library(here)library(dplyr)library(purrr)library(ape)library(stringr)# Load the list of treesload(here::here("Data", "GAP analyses", "list_tree_sharks_p.Rdata"))# Function to modify species namesmodify_species_name <-function(name) {str_replace(name, " ", "_")}# Modify species names in the results dataframeresults_IUCN_filtered <- results_IUCN_filtered %>%mutate(species_modified =modify_species_name(species))compute_phylo_signal <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_NT_MPA)) %>%arrange(match(species_modified, tree$tip.label)) n_species_after_matching <-nrow(matched_data)# Prune the tree to match the available data pruned_tree <-keep.tip(tree, matched_data$species_modified) n_tips_pruned_tree <-length(pruned_tree$tip.label)# Compute phylogenetic signal using Pagel's lambda, capturing any output signal <-tryCatch({ captured_output <-capture.output({ result <-phylosig(pruned_tree, matched_data$percentage_in_NT_MPA, method ="lambda", test =TRUE) }) result$captured_output <- captured_output result }, error =function(e) { error_message <-paste("Error in phylosig:", e$message)return(list(lambda =NA, P =NA, error = error_message, captured_output =NA)) })# Return all informationreturn(list(lambda = signal$lambda,p_value = signal$P,n_species = n_species_after_matching,n_tips_pruned_tree = n_tips_pruned_tree,error =if(!is.null(signal$error)) signal$error elseNA,captured_output =paste(signal$captured_output, collapse ="\n") ))}# Apply the function to all trees: use only the first two trees for the moment to save timeresults <-map_dfr(list_tree_sharks_p[1:2], ~compute_phylo_signal(.x, results_IUCN_filtered), .id ="tree")# Now, create the kable separatelylibrary(knitr)library(dplyr)library(kableExtra)results %>%select(1:4) %>%kable(caption ="Phylogenetic Signal Results for no-take MPAs",col.names =c("Tree", "Lambda", "P-value", "Number of Species"),digits =3,align =c('l', 'c', 'c', 'c')) %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"))# Compute summary statisticssummary_stats <- results %>%summarise(mean_lambda =mean(lambda, na.rm =TRUE),median_lambda =median(lambda, na.rm =TRUE),sd_lambda =sd(lambda, na.rm =TRUE),mean_p =mean(p_value, na.rm =TRUE),median_p =median(p_value, na.rm =TRUE),sd_p =sd(p_value, na.rm =TRUE),mean_n_species =mean(n_species, na.rm =TRUE),min_n_species =min(n_species, na.rm =TRUE),max_n_species =max(n_species, na.rm =TRUE) )#print(summary_stats)#Plot the tree with color gradient on terminal branches library(ggtree)library(ggplot2)library(dplyr)library(RColorBrewer)library(tidytree)# Function to get a color paletteget_color_palette <-function(n) {colorRampPalette(brewer.pal(9, "YlGnBu"))(n) # Changed to YlGnBu}# All MPAs # Function to plot the circular treeplot_circular_colored_tree <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_ALL_MPA)) %>%arrange(match(species_modified, tree$tip.label))# Print summary of percentage_in_ALL_MPA for debugging#print(summary(matched_data$percentage_in_ALL_MPA))# Prune the tree to match the available data pruned_tree <- ape::keep.tip(tree, matched_data$species_modified)# Create color palette n_colors <-100 color_palette <-get_color_palette(n_colors)# Convert tree to tidy format and add percentage data tree_data <-as_tibble(pruned_tree) %>%left_join(matched_data, by =c("label"="species_modified"))# Plot the circular tree p <-ggtree(pruned_tree, layout="circular", aes(color=percentage_in_ALL_MPA), size =0.3) %<+% tree_data# Color the branches p <- p +scale_color_gradientn(colours = color_palette, name ="% Range within all MPAs",limits =c(0, 100),na.value ="grey50") +theme(legend.position ="right")# Remove default labels and add a title p <- p +theme(plot.title =element_text(hjust =0.5)) return(p)}# Plot the first treetree_plot_ALL <-plot_circular_colored_tree(list_tree_sharks_p[[1]], results_IUCN_filtered)# Display the plotprint(tree_plot_ALL)# No-take MPAs # Function to plot the circular treeplot_circular_colored_tree <-function(tree, data) {# Ensure the data is in the same order as the tree tips and remove NAs matched_data <- data %>%filter(species_modified %in% tree$tip.label) %>%filter(!is.na(percentage_in_NT_MPA)) %>%arrange(match(species_modified, tree$tip.label))# Print summary of percentage_in_ALL_MPA for debugging#print(summary(matched_data$percentage_in_NT_MPA))# Prune the tree to match the available data pruned_tree <- ape::keep.tip(tree, matched_data$species_modified)# Create color palette n_colors <-100 color_palette <-get_color_palette(n_colors)# Convert tree to tidy format and add percentage data tree_data <-as_tibble(pruned_tree) %>%left_join(matched_data, by =c("label"="species_modified"))# Plot the circular tree p <-ggtree(pruned_tree, layout="circular", aes(color=percentage_in_NT_MPA), size =0.3) %<+% tree_data# Color the branches p <- p +scale_color_gradientn(colours = color_palette, name ="% Range within no-take MPAs",limits =c(0, 100),na.value ="grey50") +theme(legend.position ="right")# Remove default labels and add a title p <- p +theme(plot.title =element_text(hjust =0.5)) return(p)}# Plot the first treetree_plot_NT <-plot_circular_colored_tree(list_tree_sharks_p[[1]], results_IUCN_filtered)# Display the plotprint(tree_plot_NT)#ggpubr::ggarrange(tree_plot_ALL, tree_plot_NT, common.legend = T, legend = "bottom")```**Key message:** There are significant phylogenetic signals with the percentage of MPA coverage for all MPAs (lambda= 0.12-0.17; \< 0.05), and for no-take MPAs, this relationship is almost significant (lambda = 0.04; 0.08 \> p \> 0.05). However, patterns are not obvious on the trees. Analyses were ran for two trees only but can be replicated to all 100 trees.# Relate to FUSE and EDGE2Relate the percentage of species range overlapped by MPAs to FUSE and EDGE2 ```{r}library(jsonlite)library(here)library(ggplot2)library(dplyr)library(purrr)library(patchwork)library(knitr)library(kableExtra)# Read the JSON filedata <-fromJSON(here("Data", "shark_conservation_metrics_no_freshwater.json"))# Function to process data and create plotprocess_and_plot <-function(metric_name) {# Merge and process data combined_data <- data[[metric_name]]$info %>%left_join(results_coords, by ="Species") %>%mutate(across(where(is.list), unlist),across(where(is.character), as.factor),!!metric_name :=pmin(pmax(!!sym(metric_name), 0), 1))# Create plot plot <-ggplot(combined_data, aes_string(x = metric_name, y ="percentage_in_ALL_MPA")) +geom_point(alpha =0.6) +geom_smooth(method ="lm", color ="red", se =TRUE) +theme_minimal() +labs(title =paste(metric_name, "vs % Range Protected"),x = metric_name,y ="% Range Protected") +scale_x_continuous(limits =c(0, 1), breaks =seq(0, 1, 0.2)) +scale_y_continuous(limits =c(0, 100), breaks =seq(0, 100, 20)) +theme(aspect.ratio =1)# Calculate correlation cor_test <-cor.test(combined_data[[metric_name]], combined_data$percentage_in_ALL_MPA, use ="complete.obs")return(list(plot = plot, cor_test = cor_test))}# Process and plot for EDGE2 and FUSEedge2_results <-process_and_plot("EDGE2") fuse_results <-process_and_plot("FUSE") # Combine plots in a grid with 2 columnscombined_plot <- edge2_results$plot + fuse_results$plot +plot_layout(ncol =2) +plot_annotation(title =NULL,theme =theme(plot.title =element_text(hjust =0.5, size =16)) )# Print the combined plotprint(combined_plot)# Create a data frame with correlation test resultscorrelation_results <-data.frame(Metric =c("EDGE2", "FUSE"),Correlation =c(edge2_results$cor_test$estimate, fuse_results$cor_test$estimate),P_value =c(edge2_results$cor_test$p.value, fuse_results$cor_test$p.value))# Create and print the kable tablekable_table <-kable(correlation_results, col.names =c("Metric", "Correlation", "P-value"),digits =c(0, 3, 4),caption ="Correlation Test Results") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"),full_width =FALSE)kable_table```**Key message:** There is significant negative relationship between EDGE2 and FUSE with the % of range protected. Species with high FUSE or high EDGE2 tend to be less protected than those with low EDGE2 or FUSE.